#########################################################################################################
# Analysis File for When Economic Elites Support Democracy
#########################################################################################################

########################################################################
# --- load data and packages ---
########################################################################

rm(list = ls())

#install.packages('pacman')
pacman::p_load(dplyr, estimatr, conflicted, tidyr, stringr, tidyverse,
               ggplot2, stargazer, modelsummary, kableExtra)

options("modelsummary_format_numeric_latex" = "plain")
options(modelsummary_factory_default = 'kableExtra')
options(modelsummary_factory_latex = 'kableExtra')
options(modelsummary_factory_html = 'kableExtra')

conflicts_prefer(reshape2::melt, .quiet = TRUE)
conflicts_prefer(dplyr::summarize, .quiet = TRUE)
conflicts_prefer(dplyr::group_by, .quiet = TRUE)
conflicts_prefer(dplyr::filter, .quiet = TRUE)
conflicts_prefer(dplyr::mutate, .quiet = TRUE)
conflicts_prefer(dplyr::desc)

# set working directory to the script location
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

sessionInfo()

# read in data
dat <- read.csv("raw_data/elites_democratization.csv", stringsAsFactors = FALSE)
committee_members <- read.csv("raw_data/prodem_committee_members.csv") 

source("elites_democratization_clean.R")

options(scipen = 999)

sessionInfo()

########################################################################
# --- define the study group ---
########################################################################

datfull <- dat  

# remove units
    # 1.  from provinces for which department boundaries could not be defined (la rioja, jujuy) 
    # 2. from provinces for which the probability of treatment was zero (san luis) 
    # 3. created within large cities that cannot be linked with outcome measures (n=11)

dat <- dat %>%
  filter(!province %in% c("san luis", "la rioja", "jujuy")) %>%
  filter(part == 0)

########################################################################
#--- descriptive statistics ---
########################################################################

# share of all departments with a pro-RSP committee; referenced in main text, Section 3.1
round(sum(datfull$rsp_bin, na.rm = TRUE) / nrow(datfull), digits = 2)

# share of departments in the study group with a pro-RSP committee; referenced in main text, Section 3.1
round(mean(dat$rsp_bin, na.rm = TRUE), digits = 2)

known <- committee_members %>%
              group_by(province) %>%
              summarise(
                Found = sum(!is.na(profession)),
                Total = length(profession),
                .groups = "drop"
              )
# the number of pro-RSP committee members whose occupations could be identified; referenced in main text, Section 3.1 and supplementary materials, Section F
known 

# share of departments with a pro-RSP committee among those that did not send local chapter to conferences on labor cooptation 
round(mean(dat$rsp_bin[dat$liga_bin == 0], na.rm = TRUE), digits = 2) # referenced in main text, Section 4

# median of the distribution of rented farms that employ sharecropping; referenced in main text, Section 4
round(median(dat$sharecrop_rentfarm_c95, na.rm = TRUE), digits = 2) 

########################################################################
# --- Figure 3: occupational categories of pro-RSP committee members ---
########################################################################

# The occupational categories of pro-RSP committee president(s) and vice-president(s) in Buenos Aires and Santiago del Estero. 

committee_members <- committee_members %>% filter(!is.na(profession)) #individuals whose professions could be identified

work_categories <- c("Elite", "Professional", "Public", "Other")

# table tallying occupations in each province
tab_occup <- committee_members %>%
  pivot_longer(cols = all_of(work_categories), names_to = "Category", values_to = "value") %>%
  mutate(Category = recode(Category,
                           "Other" = "Other",
                           "Public" = "Public\nOfficials",
                           "Professional" = "Professionals",
                           "Elite" = "Landed, Commercial,\nand Industrial Elites")) %>%
  group_by(province, Category) %>%
  summarise(number = sum(value, na.rm = TRUE),
            total = n(),
            .groups = "drop") %>%
  mutate(prop_occup = number / total,
         Category = factor(Category, levels = rev(c("Landed, Commercial,\nand Industrial Elites",
                                                    "Professionals", "Public\nOfficials", "Other"))))

# Save Figure 3 
pdf(file = "figures/fig3.pdf", width = 6, height = 4)
ggplot(data=tab_occup, aes(x=Category, y=prop_occup)) +
  facet_wrap(~province) +
  geom_bar(stat="identity", color = "black",
           fill = "#555555",
           position = position_dodge(width=0.9)) +
  labs(y = "Proportion", x = "Occupational Category") +
  theme_minimal() +
  theme(axis.title.x=element_text(colour="black"),
        axis.title.y=element_text(colour="black"),
        axis.text.y.left = element_text(color = "black"),
        axis.text.x.bottom = element_text(color = "black")) +
  coord_flip()
dev.off()

########################################################################
# --- Table 1 --- 
########################################################################

# main results; the effect of exclusion on the presence of pro-democracy committees

rsp <- dat

mod1.tab1 <- lm_robust(rsp_bin ~ treat, 
                  fixed_effects = ~ province,
                  weights = ipw,
                  clusters = clust_uni,
                  se_type = "stata",
                  data = rsp)

mod2.tab1 <- lm_robust(rsp_bin ~ treat, 
                       fixed_effects = ~ province,
                       weights = ipw,
                       clusters = clust_uni,
                       se_type = "stata",
                       data = rsp[rsp$region == "provinces",])

mod3.tab1 <- lm_robust(rsp_nbr ~ treat, 
                          fixed_effects = ~ province,
                          weights = ipw,
                          clusters = clust_uni,
                          se_type = "stata",
                          data = rsp)

mod4.tab1 <- lm_robust(rsp_nbr ~ treat, 
                       fixed_effects = ~ province,
                       weights = ipw,
                       clusters = clust_uni,
                       se_type = "stata",
                       data = rsp[rsp$region == "provinces",])

# Define model list
models <- list(
  "(1)" = mod1.tab1,
  "(2)" = mod2.tab1,
  "(3)" = mod3.tab1,
  "(4)" = mod4.tab1
)

# Add the row to indicate the sample that is being used and the number of clusters row manually
custom_rows <- data.frame(
  V1 = c("\\rule{0pt}{1.2em} Sample", 
         "Num. Clusters"),
  V2 = c("\\makecell{Provinces \\\\ \\& City of Buenos Aires}", models[[1]]$nclusters),
  V3 = c("Provinces",  models[[2]]$nclusters),
  V4 = c("\\makecell{Provinces \\\\ \\& City of Buenos Aires}", models[[3]]$nclusters),
  V5 = c("Provinces", models[[4]]$nclusters),
  stringsAsFactors = FALSE
)

# Insert the rows after the 3rd row of the main table (i.e., after coefficients and SEs)
attr(custom_rows, "position") <- 3

title <- "The Effect of Exclusion from the Ruling Coalition on Elite Support for Democratization \\label{tab:mainres}"

# Create modelsummary LaTeX output
tab1 <- modelsummary(
  models,
  title = title,
  col.names = NULL,
  output = "latex",
  booktabs = TRUE,
  escape = FALSE,
  gof_omit = 'AIC|BIC|RMSE|Within|FE|design|R2|condition|Num.Blocks|Std.Errors',  # doesn't remove design, condition, etc.
  coef_map = c("treat" = "Excluded"),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  fmt = fmt_decimal(digits = 2),
  notes_append = FALSE,
  add_rows = custom_rows, book_tabs = T
) 

tab1 <- tab1  %>% 
  add_header_above(c(" " = 1, "(1)" = 1, "(2)" = 1, "(3)" = 1, "(4)" = 1),
                   line = FALSE,
                   bold = FALSE,
                   escape = FALSE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Binary Indicator" = 2, "Count" = 2)) %>%
  add_header_above(c(" ", "Committee for Pro-Democracy Politician" = 4),
                   italic = TRUE)

# modifications to the table
lines <- strsplit(tab1, "\n")[[1]]

# Find the line numbers
coef_line <- grep("Excluded", lines)[1]

# Insert a midrule directly above the coefficient row
if (!is.na(coef_line)) {
  lines <- append(lines, "\\midrule", after = coef_line - 1)
}

# Remove the first \toprule (horizontal line before header rows)
first_toprule <- which(grepl("\\\\toprule", lines))[1]

if (length(first_toprule) > 0) {
  lines <- lines[-first_toprule]
}

# wrap the note at the bottom 
bottom_caption <- grep("p \\$<\\$ 0.1", lines) #find the line number

lines[bottom_caption] <- "\\multicolumn{5}{l}{\\parbox[t]{\\textwidth}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.}}\\\\"

# Put the table back together
tab1 <- paste(lines, collapse = "\n")

writeLines(tab1, "../dataverse/tables/tab1.tex")

########################################################################
# --- Table 2 --- 
########################################################################

# The effect of exclusion on attending conferences on worker co-optation

liga <- dat

# the outcome variable in the City of Buenos Aires was recorded using the borders of 
    # police precincts, which could not be mapped onto the borders of the electoral districts 
    # created through the 1902 electoral law. Consequently, these units
    # are not included in the analysis. 

liga <- filter(liga, !(province %in% c("federal district")))

#binary and committee analysis, whole sample -----
mod1.tab2 <- lm_robust(liga_bin ~ treat, 
                  fixed_effects = ~ province,
                  weights = ipw,
                  clusters = clust_uni,
                  se_type = "stata",
                  data = liga)

mod2.tab2 <- lm_robust(liga_chapter_nbr ~ treat, 
                       fixed_effects = ~ province,
                       weights = ipw,
                       clusters = clust_uni,
                       se_type = "stata",
                       data = liga)

# Define model list
models <- list(
  "(1)" = mod1.tab2,
  "(2)" = mod2.tab2
)

# Add the row to indicate the sample that is being used and the number of clusters row manually
custom_rows <- data.frame(
  V1 = c("Num. Clusters"),
  V2 = c(as.character(models[[1]]$nclusters)),
  V3 = c(as.character(models[[2]]$nclusters)),
  stringsAsFactors = FALSE
)

# Insert the rows after the 3rd row of the main table (i.e., after coefficients and SEs)
attr(custom_rows, "position") <- 3

## start the table output 
title <- "The Effect of Exclusion from the Ruling Coalition on Attending Conferences on Worker Co-optation \\label{tab:league}"

# Create modelsummary output for Latex
tab2 <- modelsummary(
  models,
  title = title,
  col.names = NULL,
  output = "latex",
  booktabs = TRUE,
  escape = FALSE,
  gof_omit = 'AIC|BIC|RMSE|Within|FE|design|R2|condition|Num.Blocks|Std.Errors',  # doesn't remove design, condition, etc.
  coef_map = c("treat" = "Excluded"),
  fmt = fmt_decimal(digits = 2),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  notes_append = FALSE,
  add_rows = custom_rows, book_tabs = T
) 

tab2 <- tab2  %>% 
  add_header_above(c(" " = 1, "(1)" = 1, "(2)" = 1),
                   line = FALSE,
                   bold = FALSE,
                   escape = FALSE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Binary Indicator" = 1, "Count" = 1),
                   line = TRUE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Local Chapters Attending Patriotic League Conferences" = 2),
                   italic = TRUE)

# Split into lines
lines <- strsplit(tab2, "\n")[[1]]

# Standardize cmidrule spacing to ensure equal-length lines under headers
for (col in 2:(length(models) + 1)) {
  pattern <- sprintf("\\\\cmidrule\\(l\\{3pt\\}r\\{3pt\\}\\)\\{%d-%d\\}", col, col)
  replacement <- sprintf("\\\\cmidrule(lr){%d-%d}", col, col)
  lines <- gsub(pattern, replacement, lines)
}

# Find the line numbers
coef_line <- grep("Excluded", lines)[1]

# Insert a midrule directly above the coefficient row
if (!is.na(coef_line)) {
  lines <- append(lines, "\\midrule", after = coef_line - 1)
}

# Remove the first \toprule (horizontal line before header rows)
first_toprule <- which(grepl("\\\\toprule", lines))[1]

if (length(first_toprule) > 0) {
  lines <- lines[-first_toprule]
}

# wrap the note at the bottom 
bottom_caption <- grep("p \\$<\\$ 0.1", lines) #find the line number

lines[bottom_caption] <- "\\multicolumn{3}{l}{\\parbox[t]{\\textwidth}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.}}\\\\"

# Put the table back together
tab2 <- paste(lines, collapse = "\n")

writeLines(tab2, "../dataverse/tables/tab2.tex")

########################################################################
# --- Figure 4 --- 
########################################################################

# Perform HTE analysis of main effects by high/low labor control potential (sharecrop_rentfarm_c95)

# remove
    # 1. units from the City of Buenos Aires is removed from this analysis because it was not included in the rural component of the 1895 census 
    # 2. department of Gualilan (province San Juan), which did not report measures of labor control 

rsp_hte <- dat %>%
  filter(province != "federal district") %>%
  filter(!is.na(sharecrop_rentfarm_c95))

high <- rsp_hte %>% filter(sharecrop_rent_bin == "High")
low <- rsp_hte %>% filter(sharecrop_rent_bin == "Low")

mod1.fig4 <- lm_robust(rsp_bin ~ treat, 
                       fixed_effects = ~(province),
                       clusters = clust_uni,
                       weights = ipw, 
                       se_type = "stata",
                       data = high)

mod2.fig4 <- lm_robust(rsp_bin ~ treat, 
                       fixed_effects = ~(province),
                       weights = ipw, 
                       clusters = clust_uni,
                       se_type = "stata",
                       data = low)

# Prepare plot-level labels and significance markers
rsp_hte <- rsp_hte %>%
  mutate(htelabel = if_else(treat == 0, "Included", "Excluded"),
         htelabel = factor(htelabel, levels = rev(c("Excluded", "Included"))),
         sharecrop_rent_bin = factor(case_when(sharecrop_rent_bin == "High" ~ "Labor Concerns High",
                                               sharecrop_rent_bin == "Low" ~ "Labor Concerns Low",
                                               TRUE ~ as.character(sharecrop_rent_bin)),
                                     levels = c("Labor Concerns Low", "Labor Concerns High")))

# Significance stars
sig_from_model <- function(model) {
  df <- as.data.frame(model[5])
  df %>% mutate(sigstar = case_when(
    p.value > 0.1 ~ "",
    p.value > 0.05 ~ "*",
    p.value > 0.01 ~ "**",
    !is.na(p.value) ~ "***",
    TRUE ~ ""
  ))
}

highsig <- sig_from_model(mod1.fig4)
lowsig  <- sig_from_model(mod2.fig4)

fig_sig <- tibble(
  label = c(as.character(lowsig[2]), as.character(highsig[2])),
  htelabel = factor(c("Excluded", "Included"), levels = c("Excluded", "Included")),
  sharecrop_rent_bin = factor(c("Labor Concerns Low", "Labor Concerns High"),
                              levels = c("Labor Concerns Low", "Labor Concerns High"))
)

pdf(file = "figures/fig4.pdf", 
    width = 4, height = 3.5)
ggplot(rsp_hte, aes(x = as.factor(htelabel), y = rsp_bin,
                    fill = as.factor(sharecrop_rent_bin))) +
  facet_wrap(~sharecrop_rent_bin) +
  geom_bar(stat = "summary", fun = "mean",
           color = "black",
           position = position_dodge(width=0.9)) +
  geom_errorbar(aes(linewidth=2), stat = 'summary', 
                position = position_dodge(width=0.9), 
                color = "black", width = 0.25, linewidth = 0.5) +
  geom_segment(aes(x=1, y=0.439, xend=1.2, yend=0.459), colour="darkgray") +
  geom_segment(aes(x=1.2, y=0.459, xend=1.8, yend=0.459), colour="darkgray") +
  geom_segment(aes(x=1.8, y=0.459, xend=2, yend=0.439), colour="darkgray") + 
  geom_text(data = fig_sig, aes(label = label, x = 1.5, y = 0.465)) +
  scale_fill_manual(values = gray(1:3/3)) +
  theme(axis.title.y=element_text(colour="black"),
        axis.text.y.left = element_text(color = "black"),
        axis.text.x.bottom = element_text(color = "black"),
        strip.text.x = element_blank()) +
  labs(x = "", y = "Presence of a\nPro-Democracy Committee", fill = "Potential for Labor Control") +
  theme_minimal() +
  theme(legend.position="none") 
dev.off()

########################################################################
## --- Table A1 --- ##
########################################################################

# Descriptive statistics of relevant variables
descriptives <- dat %>% select(capital_c95, population_c95, urban_prop_c95, 
                               arg_men_prop_c95, national_guard_prop_c95,
                               realest_prop_c95, realest_arg_prop_c95, news_number_c95, vehicles_c95,
                               sharecrop_allfarm_c95, sharecrop_rentfarm_c95, ag_acre, 
                               rsp_bin, rsp_nbr, liga_bin, liga_chapter_nbr)

# Make sure all columns are numeric
descriptives_clean <- descriptives %>%
  mutate(across(everything(), as.numeric))

desc <- stargazer(descriptives, summary.stat = c("mean", "sd", "min", "max", "n"),
          digits=2, 
          title = "Summary Statistics", label = "tab:summstats",
          covariate.labels = c("Provincial Capital", "Population", 
                               "Urban Population", "Argentinean Men", 
                               "Enrolled in National Guard", "Property\\\\Owners", "Argentinean Property\\\\Owners", 
                               "Newspapers", "Vehicles", 
                               "\\makecell[l]{Sharecropping Among\\\\All Farms}",
                               "\\makecell[l]{Sharecropping Among\\\\Rented Farms}",
                               "Agriculture (acres)", "\\makecell[l]{Committee for Pro-Democracy\\\\Politician - Binary}",
                               "\\makecell[l]{Committee for Pro-Democracy\\\\Politician - Count}",
                               "Chapter of Patriotic League - Binary", "Chapter of Patriotic League - Count"),
          notes.label = "", notes.align = "c",
          notes = "\\footnotesize{Summary statistics of relevant variables.}",
          type = "latex")

writeLines(desc, "../dataverse/tables/tabA1.tex")

########################################################################
## --- Table D1 --- ##
########################################################################

# In each province, show number of electoral districts created through the 1902 
  # law and how many were scheduled to be filled in the upcoming general election

# aggregate data to the electoral district
district <- datfull %>% select(province, clust_uni, treat_strict)
district <- unique(district) 

dist <- district %>% 
  group_by(province) %>%
  summarize(created = n_distinct(clust_uni), 
            filled = created - sum(treat_strict)) 

dist$province <- str_to_title(dist$province) 

# Add total row
dist <- dist %>%
  bind_rows(tibble(
    province = "\\textbf{Total}",
    created = NA,
    filled = NA))

dist$created[length(dist$created)] <- paste("\\textbf{", sum(dist$created, na.rm = TRUE), "}", sep= "")
dist$filled[length(dist$filled)] <- paste("\\textbf{", sum(dist$filled, na.rm = TRUE), "}", sep= "")

colnames(dist) <- c("Province", "Nbr. of Districts", "\\makecell{Seats Scheduled to be Filled\\\\in the Next General Election}")

# Define LaTeX title & notes# Define title and notes
caption_text <- "Single-Member Districts Created Through the 1902 Electoral Law.\\label{tab:D1}"

# Write the table to file
table_tex <- datasummary_df(dist,
                output = "latex",
                title = caption_text,
                escape = FALSE,
                fmt = 0,
                align = "lcc")

# Write table
writeLines(table_tex, "../dataverse/tables/tabD1.tex")

########################################################################
## --- Table E.1 --- ##
########################################################################

# Show balance on pre-treatment covariates from 1895 Census

bal <- dat

# the administrative boundaries within the City of Buenos Aires changed between 
  # the 1895 census and the year of treatment assignment. As a result, units in the 
  # city are not included in the balance tests. The sample for the balance tests
  # is thus equivalent to the analysis in Table 1 that includes only Argentina's provinces 
  # (columns 2 and 4) and the analysis in Table 2.

bal <- bal %>% filter(province != "federal district") 

arg_men <- lm_robust(arg_men_prop_c95 ~ treat, 
                     fixed_effects = ~ province,
                     clusters = clust_uni,
                     weights = ipw, 
                     se_type = "stata",
                     data = bal)

urban <- lm_robust(urban_prop_c95 ~ treat, 
                   fixed_effects = ~ province,
                   clusters = clust_uni,
                   weights = ipw, 
                   se_type = "stata",
                   data = bal)

population <- lm_robust(population_c95 ~ treat, 
                        fixed_effects = ~ province,
                        clusters = clust_uni,
                        weights = ipw, 
                        se_type = "stata",
                        data = bal)

prov_cap <- lm_robust(capital_c95 ~ treat, 
                      fixed_effects = ~ province,
                      clusters = clust_uni,
                      weights = ipw, 
                      se_type = "stata",
                      data = bal)

literate <- lm_robust(lit_prop_c95 ~ treat, 
                          fixed_effects = ~ province,
                          clusters = clust_uni,
                          weights = ipw, 
                          se_type = "stata",
                          data = bal)

vehicles <- lm_robust(vehicles_c95 ~ treat, 
                     fixed_effects = ~ province,
                     clusters = clust_uni,
                     weights = ipw, 
                     se_type = "stata",
                     data = bal)

news_number <- lm_robust(news_number_c95 ~ treat, 
                         fixed_effects = ~ province,
                         clusters = clust_uni,
                         weights = ipw, 
                         se_type = "stata",
                         data = bal)

natguard <- lm_robust(national_guard_prop_c95 ~ treat, 
                      fixed_effects = ~ province,
                      clusters = clust_uni,
                      weights = ipw, 
                      se_type = "stata",
                      data = bal)

property_all <- lm_robust(realest_prop_c95 ~ treat, 
                     fixed_effects = ~ province,
                     clusters = clust_uni,
                     weights = ipw, 
                     se_type = "stata",
                     data = bal)

property_arg <- lm_robust(realest_arg_prop_c95 ~ treat, 
                     fixed_effects = ~ province,
                     clusters = clust_uni,
                     weights = ipw, 
                     se_type = "stata",
                     data = bal)

ag_acre <- lm_robust(ag_acre ~ treat, 
                          fixed_effects = ~ province,
                          clusters = clust_uni,
                          weights = ipw, 
                          se_type = "stata",
                          data = bal)

sharecrop_commfarm <- lm_robust(sharecrop_rentfarm_c95 ~ treat, 
                                fixed_effects = ~ province,
                                clusters = clust_uni,
                                weights = ipw, 
                                se_type = "stata",
                                data = bal)

sharecrop_allfarm <- lm_robust(sharecrop_allfarm_c95 ~ treat, 
                               fixed_effects = ~ province,
                               clusters = clust_uni,
                               weights = ipw, 
                               se_type = "stata",
                               data = bal)

agprop <- lm_robust(total_farms_c95 ~ treat, 
                    fixed_effects = ~ province,
                    clusters = clust_uni,
                    weights = ipw, 
                    se_type = "stata",
                    data = bal)

## prepare the table 

# Group models into named lists by panel
panel_a <- list(
  "Provincial Capital" = prov_cap,
  "Share Urban" = urban,
  "Share Argentinean Men in Population" = arg_men,
  "Share of Population Enrolled in National Guard" = natguard,
  "Population" = population
  )

panel_b <- list(
  "Share Literate in Population" = literate,
  "Share Property Owners in Population" = property_all,
  "Share Argentinean Property Owners in Population" = property_arg,
  "Number of Newspapers" = news_number,
  "Number of Vehicles" = vehicles 
)

panel_c <- list(
  "Number of Agricultural Properties" = agprop,
  "Sharecropping Among Rented Agric. Properties" = sharecrop_commfarm,
  "Sharecropping Among All Agric. Properties" = sharecrop_allfarm,
  "Agriculture (acres)" = ag_acre
)

# Combine panels 
combine_panels <- function(models, panel_name) {
  map2_dfr(models, names(models), function(model, varname) {
    tidy(model) %>%
      filter(term == "treat") %>%
      mutate(
        Variable = varname,
        Estimate = round(estimate, 2),
        SE = round(std.error, 2),
        `p-value` = round(p.value, 2),
        Panel = panel_name
      ) %>%
      select(Panel, Variable, Estimate, SE, `p-value`)
  })
}

df_all <- bind_rows(
  combine_panels(panel_a, "Panel A: Administrative and Demographic Covariates"),
  combine_panels(panel_b, "Panel B: Development Covariates"),
  combine_panels(panel_c, "Panel C: Agricultural Covariates")
)

# Write LaTeX file
write_balance_latex <- function(df, file_path) {
  lines <- c()
  lines <- c(lines,
             "\\begin{table}[H]",
             "\\centering",
             "\\caption{Balance Tests}",
             "\\label{tab:e1}",
             "\\begin{tabular}{lccc}",
             "\\toprule",
             " & \\textbf{Estimate} & \\textbf{SE} & \\textbf{p-value} \\\\",
             "\\midrule")
  
  current_panel <- NULL
  for (i in seq_len(nrow(df))) {
    row <- df[i, ]
    panel_label <- as.character(row$Panel)
    
    if (!identical(panel_label, current_panel)) {
      if (!is.null(current_panel)) {
        lines <- c(lines, "\\midrule")  # ← Add a horizontal line before new panel
      }
      lines <- c(lines, paste0("\\multicolumn{4}{l}{\\textbf{", panel_label, "}} \\\\"))
      current_panel <- panel_label
    }
    
    line <- paste0(row$Variable, " & ", row$Estimate, " & ", row$SE, " & ", row$`p-value`, " \\\\")
    lines <- c(lines, line)
  }
  
  lines <- c(lines,
             "\\bottomrule",
             "\\end{tabular}",
             "\\vspace{2mm}",
             "\\begin{minipage}{0.9\\textwidth}",
             "\\singlespacing",  
             "\\footnotesize",
             "* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Coefficients are derived from an OLS regression with fixed effects and inverse probability weights to account for the unequal probability of treatment across provinces. The unit of analysis is the department. Standard errors are clustered at the level of treatment assignment, the electoral district. The City of Buenos Aires is excluded from the analysis due to changes in department borders between the 1895 Census and the 1902 electoral law. \\textit{Source}: Argentina (1898).",
             "\\end{minipage}",
             "\\end{table}")
  
  writeLines(lines, file_path)
}

# Export to LaTeX file
write_balance_latex(df_all, "../dataverse/tables/tabE1.tex")

########################################################################
## --- Table E2 --- ##
########################################################################

# Main analyis (Table 1) when data is aggregated to the electoral district, 
  #(instead of the department)

dist <- rsp %>% 
  group_by(province, circ, region, clust_uni) %>%
  summarize(rsp_nbr = sum(rsp_nbr),
            treat = mean(treat), 
            ipw = mean(ipw))

dist$rsp_bin <- ifelse(dist$rsp_nbr == 0, 0, 1) # recode binary indicator because the data was aggregated

#now run analysis at the district level:
mod1.tabE2 <- lm_robust(rsp_bin ~ treat, 
                        fixed_effects = ~ province,
                        weights = ipw, 
                        se_type = "stata",
                        data = dist)

mod2.tabE2 <- lm_robust(rsp_bin ~ treat, 
                        fixed_effects = ~ province,
                        weights = ipw, 
                        se_type = "stata",
                        data = dist[dist$region == "provinces",])

mod3.tabE2 <- lm_robust(rsp_nbr ~ treat, 
                        fixed_effects = ~ province,
                        weights = ipw, 
                        se_type = "stata",
                        data = dist)

mod4.tabE2 <- lm_robust(rsp_nbr ~ treat, 
                        fixed_effects = ~ province,
                        weights = ipw, 
                        se_type = "stata",
                        data = dist[dist$region == "provinces",])

# Define model list
models <- list(
  "(1)" = mod1.tabE2,
  "(2)" = mod2.tabE2,
  "(3)" = mod3.tabE2,
  "(4)" = mod4.tabE2
)

## build the table output 

# Define row to indicate sample that is being used
row <- data.frame("\\rule{0pt}{1.2em} Sample", "\\makecell{Provinces \\\\ \\& City of Buenos Aires}", "Provinces", 
                  "\\makecell{Provinces \\\\ \\& City of Buenos Aires}", "Provinces")

attr(row, "position") <- 3

title <- "The Effect of Exclusion from the Ruling Coalition on Elite Support for Democratization: District-Level Analysis\\label{tab:E2}"

# Create modelsummary LaTeX output
tabE2 <- modelsummary(
  models,
  title = title,
  col.names = NULL,
  output = "latex",
  booktabs = TRUE,
  escape = FALSE,
  gof_omit = 'AIC|BIC|RMSE|Within|FE|design|R2|condition|Num.Blocks|Std.Errors',
  coef_map = c("treat" = "Excluded"),
  fmt = fmt_decimal(digits = 2),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  notes_append = FALSE,
  add_rows = row, book_tabs = T
) 

tabE2 <- tabE2  %>% 
  add_header_above(c(" " = 1, "(1)" = 1, "(2)" = 1, "(3)" = 1, "(4)" = 1),
                   line = FALSE,
                   bold = FALSE,
                   escape = FALSE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Binary Indicator" = 2, "Count" = 2)) %>%
  add_header_above(c(" ", "Committee for Pro-Democracy Politician" = 4),
                   italic = TRUE)

# Split into lines
lines <- strsplit(tabE2, "\n")[[1]]

# Find the line numbers
coef_line <- grep("Excluded", lines)[1]

# Insert a midrule directly above the coefficient row
if (!is.na(coef_line)) {
  lines <- append(lines, "\\midrule", after = coef_line - 1)
}

# Remove the first \toprule (horizontal line before header rows)
first_toprule <- which(grepl("\\\\toprule", lines))[1]

if (length(first_toprule) > 0) {
  lines <- lines[-first_toprule]
}

# wrap the note at the bottom 
bottom_caption <- grep("p \\$<\\$ 0.1", lines) #find the line number

lines[bottom_caption] <- "\\multicolumn{5}{l}{\\parbox[t]{\\textwidth}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.}}\\\\"

# Put the table back together
tabE2 <- paste(lines, collapse = "\n")

writeLines(tabE2, "../dataverse/tables/tabE2.tex")

########################################################################
## --- Table E3 --- ##
########################################################################

# reproduces the main analysis (table 1) using a strict measure of treatment that considers only districts 
  #scheduled to fill a seat in the upcoming general election 
  #(rather than districts that filled seats vacated early due to death or illness)  

mod1.tabe3 <- lm_robust(rsp_bin ~ treat_strict, 
                        fixed_effects = ~ province,
                        weights = ipw_strict,
                        clusters = clust_uni,
                        se_type = "stata",
                        data = rsp)

mod2.tabe3 <- lm_robust(rsp_bin ~ treat_strict, 
                        fixed_effects = ~ province,
                        weights = ipw_strict,
                        clusters = clust_uni,
                        se_type = "stata",
                        data = rsp[rsp$region == "provinces",])

mod3.tabe3 <- lm_robust(rsp_nbr ~ treat_strict, 
                        fixed_effects = ~ province,
                        weights = ipw_strict,
                        clusters = clust_uni,
                        se_type = "stata",
                        data = rsp)

mod4.tabe3 <- lm_robust(rsp_nbr ~ treat_strict, 
                        fixed_effects = ~ province,
                        weights = ipw_strict,
                        clusters = clust_uni,
                        se_type = "stata",
                        data = rsp[rsp$region == "provinces",])

# Define model list
models <- list(
  "(1)" = mod1.tabe3,
  "(2)" = mod2.tabe3,
  "(3)" = mod3.tabe3,
  "(4)" = mod4.tabe3
)

# Add the row to indicate the sample that is being used and the number of clusters row manually
custom_rows <- data.frame(
  V1 = c("\\rule{0pt}{1.2em} Sample", 
         "Num. Clusters"),
  V2 = c("\\makecell{Provinces \\\\ \\& City of Buenos Aires}", models[[1]]$nclusters),
  V3 = c("Provinces",  models[[2]]$nclusters),
  V4 = c("\\makecell{Provinces \\\\ \\& City of Buenos Aires}", models[[3]]$nclusters),
  V5 = c("Provinces", models[[4]]$nclusters),
  stringsAsFactors = FALSE
)

# Insert the rows after the 3rd row of the main table (i.e., after coefficients and SEs)
attr(custom_rows, "position") <- 3

title <- "The Effect of an Alternative Coding of Exclusion from the Ruling Coalition on Elite Support for Democratization \\label{tab:e3}"

# Create modelsummary LaTeX output
tabe3 <- modelsummary(
  models,
  title = title,
  col.names = NULL,
  output = "latex",
  booktabs = TRUE,
  escape = FALSE,
  gof_omit = 'AIC|BIC|RMSE|Within|FE|design|R2|condition|Num.Blocks|Std.Errors',  # doesn't remove design, condition, etc.
  coef_map = c("treat_strict" = "Excluded"),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  fmt = fmt_decimal(digits = 2),
  notes_append = FALSE,
  add_rows = custom_rows, book_tabs = T
) 

tabe3 <- tabe3  %>% 
  add_header_above(c(" " = 1, "(1)" = 1, "(2)" = 1, "(3)" = 1, "(4)" = 1),
                   line = FALSE,
                   bold = FALSE,
                   escape = FALSE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Binary Indicator" = 2, "Count" = 2)) %>%
  add_header_above(c(" ", "Committee for Pro-Democracy Politician" = 4),
                   italic = TRUE)

# modifications to the table
lines <- strsplit(tabe3, "\n")[[1]]

# Find the line numbers
coef_line <- grep("Excluded", lines)[1]

# Insert a midrule directly above the coefficient row
if (!is.na(coef_line)) {
  lines <- append(lines, "\\midrule", after = coef_line - 1)
}

# Remove the first \toprule (horizontal line before header rows)
first_toprule <- which(grepl("\\\\toprule", lines))[1]

if (length(first_toprule) > 0) {
  lines <- lines[-first_toprule]
}

# wrap the note at the bottom 
bottom_caption <- grep("p \\$<\\$ 0.1", lines) #find the line number

lines[bottom_caption] <- "\\multicolumn{5}{l}{\\parbox[t]{\\textwidth}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.}}\\\\"

# Put the table back together
tabe3 <- paste(lines, collapse = "\n")

writeLines(tabe3, "../dataverse/tables/tabE3.tex")

########################################################################
## --- Table E4 --- ##
########################################################################

# Produces table output of Figure 4 (HTE analysis based on high/low labor control potential).
  #Figure 4 uses a binned measure of attending a conference on labor co-optation.  
  #This table adds a count measure of the outcome (total number of chapters)

# add count measure of outcome variable:
mod3.tabE4 <- lm_robust(rsp_nbr ~ treat, 
                        fixed_effects = ~(province),
                        weights = ipw, 
                        clusters = clust_uni,
                        se_type = "stata",
                        data = low)

mod4.tabE4 <- lm_robust(rsp_nbr ~ treat, 
                        fixed_effects = ~(province),
                        clusters = clust_uni,
                        weights = ipw, 
                        se_type = "stata",
                        data = high)

# Define model list
models <- list(
  "(1)" = mod2.fig4,
  "(2)" = mod1.fig4,
  "(3)" = mod3.tabE4,
  "(4)" = mod4.tabE4
)

## build the table output 

custom_rows <- data.frame(
  V1 = c("Sharecropping among Rented Farms",
         "Num. Clusters"),
  V2 = c("Low", models[[1]]$nclusters),
  V3 = c("High", models[[2]]$nclusters),
  V4 = c("Low", models[[3]]$nclusters),
  V5 = c("High", models[[4]]$nclusters),
  stringsAsFactors = FALSE
)

# Insert the rows after the 3rd row of the main table (i.e., after coefficients and SEs)
attr(custom_rows, "position") <- 3

title <- "Heterogeneous Treatment Effects by Labor Control Potential \\label{tab:E4}"

# Create modelsummary LaTeX output
tabE4 <- modelsummary(
  models,
  title = title,
  col.names = NULL,
  output = "latex",
  booktabs = TRUE,
  escape = FALSE,
  gof_omit = 'AIC|BIC|RMSE|Within|FE|design|R2|condition|Num.Blocks|Std.Errors',  # doesn't remove design, condition, etc.
  coef_map = c("treat" = "Excluded"),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  fmt = fmt_decimal(digits = 2),
  notes_append = FALSE,
  add_rows = custom_rows, book_tabs = T
) 

tabE4 <- tabE4  %>% 
  add_header_above(c(" " = 1, "(1)" = 1, "(2)" = 1, "(3)" = 1, "(4)" = 1),
                   line = FALSE,
                   bold = FALSE,
                   escape = FALSE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Binary Indicator" = 2, "Count" = 2)) %>%
  add_header_above(c(" ", "Committee for Pro-Democracy Politician" = 4),
                   italic = TRUE)

# Split into lines
lines <- strsplit(tabE4, "\n")[[1]]

# Find the line numbers
coef_line <- grep("Excluded", lines)[1]

# Insert a midrule directly above the coefficient row
if (!is.na(coef_line)) {
  lines <- append(lines, "\\midrule", after = coef_line - 1)
}

# Remove the first \toprule (horizontal line before header rows)
first_toprule <- which(grepl("\\\\toprule", lines))[1]

if (length(first_toprule) > 0) {
  lines <- lines[-first_toprule]
}

# wrap the note at the bottom 
bottom_caption <- grep("p \\$<\\$ 0.1", lines) #find the line number

lines[bottom_caption] <- "\\multicolumn{5}{l}{\\parbox[t]{\\textwidth}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.}}\\\\"

# fix formatting of column widths:
tabular_w <- grep("begin\\{tabular", lines) #find the line number
lines[tabular_w] <- "\\begin{tabular}{l*{4}{>{\\centering\\arraybackslash}p{5em}}}\\\\"


# Put the table back together
tabE4 <- paste(lines, collapse = "\n")

writeLines(tabE4, "../dataverse/tables/tabE4.tex")

########################################################################
## --- Table E5 --- ##
########################################################################

# HTE analysis using alternative measure of high/low labor control potential
  # that is based on sharecropping among ALL farm units (rather than only among rented)

high <- rsp_hte[rsp_hte$sharecrop_all_bin == "High",]
low <- rsp_hte[rsp_hte$sharecrop_all_bin == "Low",]

mod1.tabE5 <- lm_robust(rsp_bin ~ treat, 
                        fixed_effects = ~province,
                        weights = ipw, 
                        clusters = clust_uni,
                        se_type = "stata",
                        data = low)

mod2.tabE5 <- lm_robust(rsp_bin ~ treat, 
                        fixed_effects = ~(province),
                        weights = ipw, 
                        clusters = clust_uni,
                        se_type = "stata",
                        data = high)

mod3.tabE5 <- lm_robust(rsp_nbr ~ treat, 
                        fixed_effects = ~province,
                        weights = ipw, 
                        clusters = clust_uni,
                        se_type = "stata",
                        data = low)

mod4.tabE5 <- lm_robust(rsp_nbr ~ treat,  
                        fixed_effects = ~province,
                        weights = ipw, 
                        clusters = clust_uni,
                        se_type = "stata",
                        data = high)

# Define model list
models <- list(
  "(1)" = mod1.tabE5,
  "(2)" = mod2.tabE5,
  "(3)" = mod3.tabE5,
  "(4)" = mod4.tabE5
)

## build the table output 

custom_rows <- data.frame(
  V1 = c("Sharecropping among All Farms",
         "Num. Clusters"),
  V2 = c("Low", models[[1]]$nclusters),
  V3 = c("High", models[[2]]$nclusters),
  V4 = c("Low", models[[3]]$nclusters),
  V5 = c("High", models[[4]]$nclusters),
  stringsAsFactors = FALSE
)

# Insert the rows after the 3rd row of the main table (i.e., after coefficients and SEs)
attr(custom_rows, "position") <- 3

title <- "Heterogeneous Treatment Effects by Labor Control Potential \\label{tab:e5}"

# Create modelsummary LaTeX output
tabE5 <- modelsummary(
  models,
  title = title,
  col.names = NULL,
  output = "latex",
  booktabs = TRUE,
  escape = FALSE,
  gof_omit = 'AIC|BIC|RMSE|Within|FE|design|R2|condition|Num.Blocks|Std.Errors',  # doesn't remove design, condition, etc.
  coef_map = c("treat" = "Excluded"),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  notes_append = FALSE,
  fmt = fmt_decimal(digits = 2),
  add_rows = custom_rows, book_tabs = T
) 

tabE5 <- tabE5  %>% 
  add_header_above(c(" " = 1, "(1)" = 1, "(2)" = 1, "(3)" = 1, "(4)" = 1),
                   line = FALSE,
                   bold = FALSE,
                   escape = FALSE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Binary Indicator" = 2, "Count" = 2)) %>%
  add_header_above(c(" ", "Committee for Pro-Democracy Politician" = 4),
                   italic = TRUE)

# Split into lines
lines <- strsplit(tabE5, "\n")[[1]]

# Find the line numbers
coef_line <- grep("Excluded", lines)[1]

# Insert a midrule directly above the coefficient row
if (!is.na(coef_line)) {
  lines <- append(lines, "\\midrule", after = coef_line - 1)
}

# Remove the first \toprule (horizontal line before header rows)
first_toprule <- which(grepl("\\\\toprule", lines))[1]

if (length(first_toprule) > 0) {
  lines <- lines[-first_toprule]
}

# wrap the note at the bottom 
bottom_caption <- grep("p \\$<\\$ 0.1", lines) #find the line number

lines[bottom_caption] <- "\\multicolumn{5}{l}{\\parbox[t]{\\textwidth}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.}}\\\\"

# fix formatting of column widths:
tabular_w <- grep("begin\\{tabular", lines) #find the line number
lines[tabular_w] <- "\\begin{tabular}{l*{4}{>{\\centering\\arraybackslash}p{5em}}}\\\\"

# Put the table back together
tabE5 <- paste(lines, collapse = "\n")

writeLines(tabE5, "../dataverse/tables/tabE5.tex")

########################################################################
## --- Table E6 --- ##
########################################################################

# Perform descriptive analysis of relationship 
# between pro-democracy committee and attending conference on labor co-optation

mod1.tabE6 <- lm_robust(liga_bin ~ rsp_bin,
                        fixed_effects = ~ province,
                        weights = ipw, 
                        clusters = clust_uni,
                        se_type = "stata",
                        data = liga)

mod2.tabE6 <- lm_robust(liga_chapter_nbr ~ rsp_bin,
                        fixed_effects = ~province,
                        weights = ipw, 
                        clusters = clust_uni,
                        se_type = "stata",
                        data = liga)

# Define model list
models <- list(
  "(1)" = mod1.tabE6,
  "(2)" = mod2.tabE6
)

# Add the row to indicate the sample that is being used and the number of clusters row manually
custom_rows <- data.frame(
  V1 = c("Num. Clusters"),
  V2 = c(as.character(models[[1]]$nclusters)),
  V3 = c(as.character(models[[2]]$nclusters)),
  stringsAsFactors = FALSE
)

# Insert the rows after the 3rd row of the main table (i.e., after coefficients and SEs)
attr(custom_rows, "position") <- 3

## start the table output 
title <- "OLS Regression of Co-optive Control on Elite Support for Democracy \\label{tab:e6}"

# Create modelsummary output for Latex
tabe6 <- modelsummary(
  models,
  title = title,
  col.names = NULL,
  output = "latex",
  booktabs = TRUE,
  escape = FALSE,
  gof_omit = 'AIC|BIC|RMSE|Within|FE|design|R2|condition|Num.Blocks|Std.Errors',  # doesn't remove design, condition, etc.
  coef_map = c("rsp_bin" = "\\makecell[l]{Committee for\\\\Pro-Democracy Candidate}"),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  fmt = fmt_decimal(digits = 2),
  notes_append = FALSE,
  add_rows = custom_rows, book_tabs = T
) 

tabe6 <- tabe6  %>% 
  add_header_above(c(" " = 1, "(1)" = 1, "(2)" = 1),
                   line = FALSE,
                   bold = FALSE,
                   escape = FALSE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Binary Indicator" = 1, "Count" = 1),
                   line = TRUE,
                   line_sep = 0) %>%
  add_header_above(c(" ", "Local Chapters Attending Patriotic League Conferences" = 2),
                   italic = TRUE)

# Split into lines
lines <- strsplit(tabe6, "\n")[[1]]

# Standardize cmidrule spacing to ensure equal-length lines under headers
for (col in 2:(length(models) + 1)) {
  pattern <- sprintf("\\\\cmidrule\\(l\\{3pt\\}r\\{3pt\\}\\)\\{%d-%d\\}", col, col)
  replacement <- sprintf("\\\\cmidrule(lr){%d-%d}", col, col)
  lines <- gsub(pattern, replacement, lines)
}

# Find the line numbers
coef_line <- grep("Committee for", lines)[1]

# Insert a midrule directly above the coefficient row
if (!is.na(coef_line)) {
  lines <- append(lines, "\\midrule", after = coef_line - 1)
}

# Remove the first \toprule (horizontal line before header rows)
first_toprule <- which(grepl("\\\\toprule", lines))[1]

if (length(first_toprule) > 0) {
  lines <- lines[-first_toprule]
}

# wrap the note at the bottom 
bottom_caption <- grep("p \\$<\\$ 0.1", lines) #find the line number

lines[bottom_caption] <- "\\multicolumn{3}{l}{\\parbox[t]{\\textwidth}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.}}\\\\"

# Put the table back together
tabe6 <- paste(lines, collapse = "\n")

writeLines(tabe6, "../dataverse/tables/tabE6.tex")

### end of anlaysis code.
